### Replication Package for "Why is Intermediating Houses so Difficult? Evidence from iBuyers"
### Buchak, Matvos, Piskorski, and Seru
###
###
### buchak@stanford.edu

### Creates iBuyer regional analysis tables.

library(data.table)
library(ggplot2)
library(zoo)
library(Hmisc)
library(stringr)
library(lfe)
library(stargazer)


source('0_helper_functions.r')

setDTthreads(20)

Figure_A9_1_Table_A9_2 <- function() {
  
  # Get entry propensity
  entry.propensities <- createiBuyerLikelihoodMeasure()
  entry.propensities[,zip5 := as.integer(zip5)]
  
  # Get price measures
  redfin.in <- loadRedfin()
  redfin.pre <- redfin.in[year == 2014]
  redfin.post <- redfin.in[year == 2019]
  redfin <- rbind(redfin.pre,redfin.post)
  redfin[,post := as.integer(year == 2019)]  
  redfin[,zip5 := as.integer(zip)]
  
  # Merge share and propensity
  merged <- merge(redfin,entry.propensities[,c('zip5','propensity','iBuyer.share'),with=F],by='zip5')
  merged[,post_x_share := iBuyer.share * post]
  
  # Regression---liquidity
  qq1 <- felm(off.two.weeks ~ post * iBuyer.share | zip5 + year, data=merged)
  qq2 <- felm(off.two.weeks ~ post * propensity | zip5 + year, data=merged)
  qq3 <- felm(off.two.weeks ~ 1 | zip5 + year | (post_x_share ~ post * propensity), data = merged)

  # Create first-stage chart.
  post <- merged[post == 1]
  post[,bins := ave(propensity,cut2(propensity,g=25))]
  by.bin <- post[,j=list(m = mean(iBuyer.share,na.rm=T)),by='bins']
  ggplot(by.bin, aes(x = bins, y = m)) +
    geom_point(color = "black") +
    geom_smooth(method = "lm", se = TRUE, color = "black", fill = "gray",linetype = 'dashed') + theme_bw() + coord_cartesian(xlim = c(0,0.10)) + xlab('Predicted propensity') + ylab('iBuyer share')
  
  ggsave('../out/figures/A91.png',height=6,width=8,units='in',dpi = 300)
  
  # For houses, do it on per-house basis
  propertyData <- loadData()[!is.na(age.bin) & !is.na(size.bin) & !is.na(multistory)]
  propertyData <- propertyData[ house.age < 100 & land_sqft < 1e5 & land_sqft > 200 & saleamount < 1.5e6 & saleamount > 1e3 & living_sqft < 10000 & living_sqft > 200 & 
                                  !is.na(median.household.income)]
  
  # Regress sale prices for the same house on iBuyer shares (taking out iBuyer purchases)
  with.propensity <- merge(propertyData,entry.propensities,by='zip5')
  with.propensity[,treated_x_post := iBuyer.share * as.numeric(year > 2016)]
  with.propensity[,seller.tenure.years := round(as.numeric(date-seller.buy.date)/365)]
  
  pp2 <- felm(log(saleamount) ~ I(year > 2016) * iBuyer.share  | house.age + year+ seller.tenure.years + paste0(pclidirisfrmtd) + zip5 | 0 | zip5, data = with.propensity[year > 2010 & saleamount < 500000 & iBuyer == 0  ])
  pp4 <- felm(log(saleamount) ~ I(year > 2016) * propensity  | house.age + year+ seller.tenure.years + paste0(pclidirisfrmtd) + zip5 | 0 | zip5, data = with.propensity[year > 2010 & saleamount < 500000 & iBuyer == 0  ])
  pp6 <- felm(log(saleamount) ~ 1  | house.age + year + seller.tenure.years + paste0(pclidirisfrmtd) + zip5 | (treated_x_post ~ I(year > 2016) * propensity) | zip5, data = with.propensity[year > 2010 & saleamount < 500000 & iBuyer == 0  ])
  
  
  
  fst <- felm(iBuyer.share ~ propensity, data = entry.propensities)
  
  stargazer(fst,pp2,pp4,pp6,qq1,qq2,qq3,type = 'html',out = '../out/tables/A92.html',dep.var.labels = c('iBuyer share','log(Sale amount)','% listings sold within 2 weeks'),
            column.labels = c('First stage','OLS','RF','IV','OLS','RF','IV'),covariate.labels = c('Propensity','Post x Propensity','Post','Post x Propensity','Post','iBuyer share','Post x iBuyer Share','Post x Share (IV)','Post x Share','Post x Share (IV)','Constant'),
            omit.stat = c('f','adj.rsq','ser'),
            add.lines = list(c('Unit of observation','Zip','Property sale','Property sale','Property sale','Zip-Year','Zip-Year','Zip-Year'),
                             c('Zip FE','N','Y','Y','Y','Y','Y','Y'),
                             c('Year FE','N','Y','Y','Y','Y','Y','Y'),
                             c('Property FE','N','Y','Y','Y','N','N','N'),
                             c('House age x Tenure FE','N','Y','Y','Y','N','N','N')))
  
}



loadRedfin <- function() {
  data.in <- fread('../data/raw/redfin/redfin_data.csv')
  
  # We're going to put it in zip-quarter
  data.in[,year := year(as.Date(period_begin,format = '%Y-%m-%d'))]
  data.in[,zip := as.numeric(gsub(region,pattern='Zip Code\\:',replace=''))]
  
  # Collapse to year-zip
  collapsed <- data.in[property_type == 'Single Family Residential',j=list(homes.sold = sum(homes_sold,na.rm=T),
                                                                           sold.above.list = weighted.mean(sold_above_list,w=homes_sold,na.rm=T),
                                                                           off.two.weeks = weighted.mean(off_market_in_two_weeks,w=homes_sold,na.rm=T),
                                                                           pct.inventory.sold = weighted.mean(homes_sold/inventory,w=inventory,na.rm=T),
                                                                           new_listings = sum(new_listings,na.rm=T),
                                                                           sale_to_list = weighted.mean(avg_sale_to_list,w=homes_sold),
                                                                           median_list_price = mean(median_list_price),
                                                                           median_sale_price = mean(median_sale_price)),by=c('zip','year')]
  
  
  return(collapsed)
}


loadRedfin_quarterly <- function() {
  data.in <- fread('../../data/redfin/redfin_data.csv')
  
  # We're going to put it in zip-quarter
  data.in[,qtr := as.yearqtr(as.Date(period_begin,format = '%Y-%m-%d'))]
  data.in[,zip := as.numeric(gsub(region,pattern='Zip Code\\:',replace=''))]
  
  # Collapse to year-zip
  collapsed <- data.in[property_type == 'Single Family Residential',j=list(homes.sold = sum(homes_sold,na.rm=T),
                                                                           sold.above.list = weighted.mean(sold_above_list,w=homes_sold,na.rm=T),
                                                                           off.two.weeks = weighted.mean(off_market_in_two_weeks,w=homes_sold,na.rm=T),
                                                                           pct.inventory.sold = weighted.mean(homes_sold/inventory,w=inventory,na.rm=T),
                                                                           new_listings = sum(new_listings,na.rm=T),
                                                                           sale_to_list = weighted.mean(avg_sale_to_list,w=homes_sold),
                                                                           median_list_price = mean(median_list_price),
                                                                           median_sale_price = mean(median_sale_price)),by=c('zip','qtr')]
  
  
  return(collapsed)
}





createiBuyerLikelihoodMeasure <- function() {
  # Creates iBuyer likelihood measure based on physical characteristics of the house (NOT PRICE!)

  propertyData <- loadData()[!is.na(age.bin) & !is.na(size.bin) & !is.na(multistory)]
  propertyData <- propertyData[ house.age < 100 & land_sqft < 1e5 & land_sqft > 200 & saleamount < 1.5e6 & saleamount > 1e3 & living_sqft < 10000 & living_sqft > 200 & 
                                  !is.na(median.household.income)]
  
  propertyData[,train := 0]
  propertyData[year %in% 2018:2019 & runif(nrow(propertyData)) < .25,train := 1]
  
  # Create the `is ibuyer` variable. Use data in markets where they've entered & have reasonable market share
  propertyData[,buyer.or.seller := as.integer(iBuyer.buyer | iBuyer.seller)]
  
  ps = 4
  
  # Run the model & predict
  mm <- lm(buyer.or.seller ~ ((age.bin + size.bin + multistory )  + poly(house.age,ps) + poly(log(land_sqft),ps) + poly(log(living_sqft),ps)) ,data=propertyData[train == 1])
  propertyData[,iBuyer.prediction.house := predict(mm,propertyData)]  
  
  
  
  byZip.early <- propertyData[year %in% 2011:2014 & train == 0,j=list(propensity = mean(iBuyer.prediction.house,na.rm=T),propensity.weighted.price.early = weighted.mean(saleamount,w = iBuyer.prediction.house,na.rm=T)),by='zip5']
  byZip.late <- propertyData[year %in% 2018 ,j=list(iBuyer.share = mean(buyer.or.seller,na.rm=T),propensity.weighted.price.late = weighted.mean(saleamount,w = iBuyer.prediction.house,na.rm=T)),by='zip5']
  byZip <- merge(byZip.early,byZip.late,by='zip5')
  
  return(byZip) 
  
}

Figure_A9_1_Table_A9_2()

